********************************************************************************
***
*** 	Replication files for Zhuang, Maiting. 2022.
***
*** 	"Intergovernmental Conflict and Censorship: Evidence from China's 
***
***		Anti-Corruption Campaign" 
***
***		Journal of the European Economic Association
***
********************************************************************************

clear

global dir "your directory path"

cd "$dir"


********************************************************************************

*** 	Constructing Articles dataset

use "$dir/data/of_articles.dta", clear

gen byte ophigh=own_province*of_high
gen byte oplow=own_province#0.of_high
gen byte nophigh=0.own_province*of_high
gen byte noplow=0.own_province#0.of_high

gen group=1 if ophigh==1
replace group=2 if nophigh==1
replace group=3 if oplow==1
replace group=4 if noplow==1

gen week_diff=floor(date_diff/7)

bys art: egen ownhigh=total(ophigh)
bys art: egen ownlow=total(oplow)
bys art: egen nownhigh=total(nophigh)
bys art: egen nownlow=total(noplow)

bys art: egen count=count(art_date)

bys art: egen high=total(of_high)

bys art: egen art_post=mean(post)

gen share_oh=ownhigh/count
gen share_ol=ownlow/count
gen share_high=high/count
gen share_low=(count-high)/count

tab of, gen(of_num)
qui describe of_num*,varlist
local master `r(varlist)'
foreach x of local master{
bys art: egen byte dum_`x'=max(`x')
drop `x'
}

gen byte art_multipleofficial=0
replace art_multipleofficial=1 if count>1

duplicates drop art, force

merge m:1 np_name_cons using  "$dir/data/newspapers.dta", keepusing(np_province1 np_province2 np_province3 np_subprovincial )
drop _merge

run "$dir/code/holidays.do"

compress

label variable art_words "Number of words"
label variable art_image "Incl. image"
label variable art_page "Page number"
label variable art_multipleofficial "Multiple officials"
label variable art_headline_copy "Announcement copy (in headline)"
label variable art_hlcorrupt "Corruption (in headline)"
label variable art_hlccdi "Campaign (in headline)"
label variable art_articles_bal "Positive to negative words"
label variable art_txtdenounce "Citizen complaints (in text)"

save "$dir/data/articles.dta"

********************************************************************************

*** 	Constructing Official-Newspaper-Day Panel and other data

* Note: This generates a large panel dataset

use "$dir/data/officialnewspaper.dta"

expand 1097

sort of np_name_cons

gen int date_diff = -732 if np_name_cons!=np_name_cons[_n-1]
replace date_diff=date_diff[_n-1]+1 if  np_name_cons==np_name_cons[_n-1]

merge m:1 of using "$dir/data/officials",keepusing(of_date)
drop if _merge==2
drop _merge

compress

gen int art_date = of_date + date_diff
format art_date %td

merge m:1 np_name_cons using "$dir/data/newspapers.dta", keepusing(np_start np_end)
drop if _merge==2
drop _merge

drop if art_date<np_start & np_start!=.
drop if art_date>np_end & np_end!=.
drop np_end np_start

compress

save "$dir/data/ofnppanel.dta", replace

***

use "$dir/data/of_articles.dta", clear

bys of np_name_cons art_date: egen byte number=count(art_date) 

duplicates drop of np_name_cons art_date , force

keep of np_name_cons art_date number date_diff

compress

merge 1:1 of np_name_cons date_diff using "$dir/data/ofnppanel.dta"
drop if _merge==1
replace number=0 if _merge==2
drop _merge 

compress

save "$dir/data/ofnppanel.dta", replace


***

merge m:1 of using "$dir/data/officials.dta"
drop if _merge==2
drop _merge

merge m:1 np_name_cons using "$dir/data/newspapers.dta"
drop if _merge==2
drop _merge

gen byte own_province=0 
replace own_province=1 if of_province==np_province1|of_province==np_province2|of_province==np_province3

gen byte opc=0 if own_province==0
replace opc=1 if own_province==1
replace opc=2 if own_province==1&(np_city1==of_city|np_city2==of_city)

label define opcdef 0 "" 1 "Different City" 2 "Same City"
label value opc opcdef

drop (*city* *district* np_govtowned np_partypaper)

compress

gen byte article=0
replace article=1 if number>0

gen byte dofw=dow(art_date)

gen byte month =month(art_date)
gen int year=year(art_date)

egen byte np = group(np_name_cons)
egen onid = group (of_name np_name_cons)

gen byte post=0
replace post=1 if date_diff>=0

label define post 0 "" 1 "Post-scandal"
label values post post
label define sp 0 "" 1 "Same province"
label values own_province sp

gen byte gzwan=0
replace gzwan=1 if np_gz==1&of_wqlgroup==1
gen byte gdwan=0
replace gdwan=1 if np_gd==1&of_wqlgroup==1
gen byte gznwan=0
replace gznwan=1 if np_gz==1&of_wqlgroup==0
gen byte gdnwan=0
replace gdnwan=1 if np_gd==1&of_wqlgroup==0

label define gzwan 0 "" 1 "Wan OF x Guangzhou NP"
label define gznwan 0 "" 1 "other OF x Guangzhou NP"
label define gdwan 0 "" 1 "Wan OF x Guangdong NP"
label define gdnwan 0 "" 1 "other OF x Guangdong NP"

foreach x in np_gz np_gd gzwan gznwan gdwan gdnwan{
label value `x' `x'
}

gen byte time=0 if art_date<19901
replace time=2 if art_date>=19901

label define time 0 "pre Wan-investigation"  2 "post Wan-investigation"
label value time time

run "$dir/code/holidays.do"

gen byte ophigh_ftx =1.of_high * 1.post * 1.own_province * of_faction_ftx_n 
gen byte high_ftx =1.of_high * 1.post *  of_faction_ftx_n 

gen byte ophigh =1.of_high * 1.post * 1.own_province 
gen byte high =1.of_high * 1.post  

label variable article "Article"
label variable number  "Number of articles"
label variable own_province "Same province"

save "$dir/data/ofnppanel.dta", replace


********************************************************************************

**** Datasets for other charts and tables

preserve 

duplicates drop art_date np_name_cons,force 

save "$dir/data/np_by_date.dta", replace

restore

***

preserve

keep article date_diff own_province of_high np_name_cons

bys date_diff own_province of_high: egen article_mean_split=mean(article) 

duplicates drop date_diff own_province of_high,force

save "$dir/data/chartdata.dta",replace

restore

***

preserve

bys np_name_cons of post: egen article_post=mean(article)

duplicates drop np_name_cons of post, force

keep np_name_cons of post article_post np_province* of_high own_province of_province

save "$dir/data/ofnppost.dta"

restore

***

keep of number np_name_cons article month* of np onid art_date date* of_high own_province  ccpprov lhprov year dofw post

gen art_week = wofd(art_date)
gen art_month = mofd(art_date)

gen week_diff=floor(date_diff/7)
gen month_diff=floor(date_diff/30)

foreach x in week month{
foreach t in cny duanwu zhongqiu{
bys art_`x' of np_name_cons: egen mean_`x'_`t' = mean(date_`t'_broad)
}
foreach tc in congress committee zhengxie renda{
bys art_`x' of np_name_cons: egen mean_`x'_`tc' = mean(date_`tc')
}
foreach tp in ccpprov lhprov{
bys art_`x' of np_name_cons: egen mean_`x'_`tp'=mean(`tp')
}
}

foreach x in week month{
bys `x'_diff of np_name_cons: egen number_`x' = total(number)
gen byte article_`x'=0
replace article_`x'=1 if number_`x'>0
}

preserve
foreach x in week month{
duplicates drop of np_name_cons `x'_diff,force
save "$dir/data/ofnppanel_`x'.dta"
}
restore

********************************************************************************

*** Datasets for Table 4

use "$dir/data/officials.dta",clear

bys of_date of_province: egen c_of_total=count(of_high)
bys of_date of_province: egen c_of_high=total(of_high)
gen c_of_low=c_of_total-c_of_high

duplicates drop of_date of_province,force
keep of_date of_province c_of*

rename of_date date
rename of_province province_name

drop c_of_total
compress

save "$dir/data/of_timeline.dta",replace

***

use "$dir/data/np_by_date.dta", clear

merge m:1 np_name_cons using "$dir/data/newspapers.dta", keepusing(np_province* np_subprovincial)
keep if _merge==3
drop _merge

expand 2 if np_name_cons=="第一财经日报",gen(exp)
replace np_province1="北京" if exp==1
expand 2 if exp==1, gen(exp2)
replace np_province1="广东" if exp2==1
drop exp exp2 

drop np_province2 np_province3

bys art_date np_province: egen np_count=count(np_name_cons)
bys art_date np_province: egen np_count_subprov=total(np_subprovincial)
gen np_count_prov=np_count-np_count_subprov

duplicates drop art_date np_province, force

compress

rename art_date date
rename np_province province_name

keep date province_name np_count_prov

save "$dir/data/np_timeline.dta",replace

***

use "$dir/data/articles.dta",clear

keep if art_post==1

keep if np_subprovincial==0

drop dum_of_num*

expand 2 if np_name_cons=="第一财经日报",gen(exp)
replace np_province1="北京" if exp==1
expand 2 if exp==1, gen(exp2)
replace np_province1="广东" if exp2==1
drop exp exp2 

drop np_province2 np_province3

foreach o in own nown{
foreach x in high low{
bys np_name_cons art_date: egen articles_`o'`x'=total(`o'`x')
gen dum_`o'`x'=0
replace dum_`o'`x'=1 if articles_`o'`x'>0
drop articles_`o'`x'
}
}

bys np_name_cons art_date: egen articles_denounce=total(art_txtdenounce)
bys np_name_cons art_date: egen articles_corrupt=total(art_hlcorrupt)
bys np_name_cons art_date: egen articles_ccdi=total(art_hlccdi)

foreach x in denounce corrupt ccdi{
gen dum_`x'=0
replace dum_`x'=1 if articles_`x'>0
drop articles_`x'
}

duplicates drop np_name_cons art_date, force

quietly describe dum*,varlist
local master `r(varlist)'
foreach x of local master{
rename `x' o`x'
bys np_province art_date: egen nps_`x'=total(o`x')
rename nps_dum* nps*
drop o`x'
}

duplicates drop art_date np_province,force
keep art_date np_province  nps* 

rename art_date date
rename np_province province_name

save "$dir/data/art_timeline.dta",replace
